df<-read.csv("input/commits.csv", header =TRUE, sep=",")
df
df$count <- log(df$count+1) 
df
# standardizing variables for skills and aspirations. 
cols <- c("count")
df[cols] <- scale(df[cols])
df[cols]
mod <- lm(count ~ factor(Group), data = df)
summary(mod)

Call:
lm(formula = count ~ factor(Group), data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.9882 -0.7652 -0.2453  0.5215  3.9111 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)  
(Intercept)    -0.08236    0.09075  -0.908   0.3645  
factor(Group)1  0.23981    0.12475   1.922   0.0551 .
factor(Group)2  0.06943    0.12497   0.556   0.5787  
factor(Group)3  0.01682    0.12235   0.137   0.8907  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9983 on 536 degrees of freedom
Multiple R-squared:  0.008999,  Adjusted R-squared:  0.003452 
F-statistic: 1.622 on 3 and 536 DF,  p-value: 0.1831
# convert to nominal factor
df$Group = factor(df$Group)
df$phase = factor(df$phase)
library(plyr)
ddply(df, ~ Group * phase, function(data) summary(data$count) )
ddply(df, ~ Group * phase, summarise, count.mean=mean(count), count.sd = sd(count))
# histograms for two factors
hist(df[df$Group == 0 & df$phase == 1,]$count )

hist(df[df$Group == 0 & df$phase == 2,]$count )

hist(df[df$Group == 0 & df$phase == 3,]$count )

hist(df[df$Group == 0 & df$phase == 4,]$count )

hist(df[df$Group == 0 & df$phase == 5,]$count )

hist(df[df$Group == 1 & df$phase == 1,]$count )

hist(df[df$Group == 1 & df$phase == 2,]$count )

hist(df[df$Group == 1 & df$phase == 3,]$count )

hist(df[df$Group == 1 & df$phase == 4,]$count )

hist(df[df$Group == 1 & df$phase == 5,]$count )

hist(df[df$Group == 2 & df$phase == 1,]$count )

hist(df[df$Group == 2 & df$phase == 2,]$count )

hist(df[df$Group == 2 & df$phase == 3,]$count )

hist(df[df$Group == 2 & df$phase == 4,]$count )

hist(df[df$Group == 2 & df$phase == 5,]$count )

hist(df[df$Group == 3 & df$phase == 1,]$count )

hist(df[df$Group == 3 & df$phase == 2,]$count )

hist(df[df$Group == 3 & df$phase == 3,]$count )

hist(df[df$Group == 3 & df$phase == 4,]$count )

hist(df[df$Group == 3 & df$phase == 5,]$count )

boxplot(count ~ Group * phase, data = df, xlab="Group.phase", ylab="count")

with(df, interaction.plot(Group, phase, count, ylim=c(0, max(count)))) # interaction plot

# library for LMM we will use on relational novelty 

library(lme4)
library(lmerTest)
library(car)
contrasts(df$Group) <= "contr.sum"
     1    2    3
0 TRUE TRUE TRUE
1 TRUE TRUE TRUE
2 TRUE TRUE TRUE
3 TRUE TRUE TRUE
contrasts(df$phase) <= "contr.sum"
     2    3    4    5
1 TRUE TRUE TRUE TRUE
2 TRUE TRUE TRUE TRUE
3 TRUE TRUE TRUE TRUE
4 TRUE TRUE TRUE TRUE
5 TRUE TRUE TRUE TRUE
full.model = lmer( count ~ Group/phase + (1 | Student ), data = df, REML = FALSE)
Anova(full.model, type=3, test.statistics="F")
Analysis of Deviance Table (Type III Wald chisquare tests)

Response: count
              Chisq Df Pr(>Chisq)    
(Intercept)  6.9240  1   0.008505 ** 
Group        3.3519  3   0.340483    
Group:phase 47.2831 16  6.151e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpkZjwtcmVhZC5jc3YoImlucHV0L2NvbW1pdHMuY3N2IiwgaGVhZGVyID1UUlVFLCBzZXA9IiwiKQpkZgpgYGAKCmBgYHtyfQpkZiRjb3VudCA8LSBsb2coZGYkY291bnQrMSkgCmBgYAoKCmBgYHtyfQojIHN0YW5kYXJkaXppbmcgdmFyaWFibGVzIGZvciBza2lsbHMgYW5kIGFzcGlyYXRpb25zLiAKY29scyA8LSBjKCJsb2dfY291bnQiKQpkZltjb2xzXSA8LSBzY2FsZShkZltjb2xzXSkKYGBgCgoKYGBge3J9Cm1vZCA8LSBsbShjb3VudCB+IGZhY3RvcihHcm91cCksIGRhdGEgPSBkZikKc3VtbWFyeShtb2QpCmBgYApgYGB7cn0KIyBjb252ZXJ0IHRvIG5vbWluYWwgZmFjdG9yCmRmJEdyb3VwID0gZmFjdG9yKGRmJEdyb3VwKQpkZiRwaGFzZSA9IGZhY3RvcihkZiRwaGFzZSkKYGBgCgpgYGB7cn0KbGlicmFyeShwbHlyKQpkZHBseShkZiwgfiBHcm91cCAqIHBoYXNlLCBmdW5jdGlvbihkYXRhKSBzdW1tYXJ5KGRhdGEkY291bnQpICkKZGRwbHkoZGYsIH4gR3JvdXAgKiBwaGFzZSwgc3VtbWFyaXNlLCBjb3VudC5tZWFuPW1lYW4oY291bnQpLCBjb3VudC5zZCA9IHNkKGNvdW50KSkKYGBgCmBgYHtyfQojIGhpc3RvZ3JhbXMgZm9yIHR3byBmYWN0b3JzCmhpc3QoZGZbZGYkR3JvdXAgPT0gMCAmIGRmJHBoYXNlID09IDEsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMCAmIGRmJHBoYXNlID09IDIsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMCAmIGRmJHBoYXNlID09IDMsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMCAmIGRmJHBoYXNlID09IDQsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMCAmIGRmJHBoYXNlID09IDUsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMSAmIGRmJHBoYXNlID09IDEsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMSAmIGRmJHBoYXNlID09IDIsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMSAmIGRmJHBoYXNlID09IDMsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMSAmIGRmJHBoYXNlID09IDQsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMSAmIGRmJHBoYXNlID09IDUsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMiAmIGRmJHBoYXNlID09IDEsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMiAmIGRmJHBoYXNlID09IDIsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMiAmIGRmJHBoYXNlID09IDMsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMiAmIGRmJHBoYXNlID09IDQsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMiAmIGRmJHBoYXNlID09IDUsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMyAmIGRmJHBoYXNlID09IDEsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMyAmIGRmJHBoYXNlID09IDIsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMyAmIGRmJHBoYXNlID09IDMsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMyAmIGRmJHBoYXNlID09IDQsXSRjb3VudCApCmhpc3QoZGZbZGYkR3JvdXAgPT0gMyAmIGRmJHBoYXNlID09IDUsXSRjb3VudCApCmJveHBsb3QoY291bnQgfiBHcm91cCAqIHBoYXNlLCBkYXRhID0gZGYsIHhsYWI9Ikdyb3VwLnBoYXNlIiwgeWxhYj0iY291bnQiKQp3aXRoKGRmLCBpbnRlcmFjdGlvbi5wbG90KEdyb3VwLCBwaGFzZSwgY291bnQsIHlsaW09YygwLCBtYXgoY291bnQpKSkpICMgaW50ZXJhY3Rpb24gcGxvdApgYGAKCmBgYHtyfQojIGxpYnJhcnkgZm9yIExNTSB3ZSB3aWxsIHVzZSBvbiByZWxhdGlvbmFsIG5vdmVsdHkgCgpsaWJyYXJ5KGxtZTQpCmxpYnJhcnkobG1lclRlc3QpCmxpYnJhcnkoY2FyKQpgYGAKCmBgYHtyfQpjb250cmFzdHMoZGYkR3JvdXApIDw9ICJjb250ci5zdW0iCmNvbnRyYXN0cyhkZiRwaGFzZSkgPD0gImNvbnRyLnN1bSIKYGBgCmBgYHtyfQpmdWxsLm1vZGVsID0gbG1lciggY291bnQgfiBHcm91cC9waGFzZSArICgxIHwgU3R1ZGVudCksIGRhdGEgPSBkZiwgUkVNTCA9IEZBTFNFKQpBbm92YShmdWxsLm1vZGVsLCB0eXBlPTMsIHRlc3Quc3RhdGlzdGljcz0iRiIpCmZ1bGwubW9kZWwKYGBgCmBgYHtyfQoKYGBgCgo=